Executive Summary
The goal of this project is to determine the states that are most in need of electric vehicle stations based on a number of factors. These factors include the current number of stations, number of electric vehicle registrations, and state population. Results from this project should identify the states most in need of adding electric vehicle charging stations.
Part 1: Cleaning & Exploratory Data Analysis
import libraries and dataset EV Station Data: https://afdc.energy.gov/data_download
library(tidyverse)
library(dplyr)
library(janitor)
library(Hmisc)
library(ggplot2)
alt_fuel_stations <- read_csv("data/alt_fuel_stations (Jul 7 2025).csv")
alt_fuel_stations
set random state seed
set.seed(42)
clean columns names using janitor
alt_fuel_stations_clean <- clean_names(alt_fuel_stations)
alt_fuel_stations_clean
filter to only EV stations (“ELEC”)
ev_stations <- filter(alt_fuel_stations_clean, fuel_type_code == "ELEC")
ev_stations
take a look at the values
summary(ev_stations)
fuel_type_code station_name street_address intersection_directions city state zip plus4 station_phone
Length:87392 Length:87392 Length:87392 Length:87392 Length:87392 Length:87392 Length:87392 Mode:logical Length:87392
Class :character Class :character Class :character Class :character Class :character Class :character Class :character NA's:87392 Class :character
Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character
status_code expected_date groups_with_access_code access_days_time cards_accepted bd_blends ng_fill_type_code ng_psi ev_level1_evse_num
Length:87392 Mode:logical Length:87392 Length:87392 Length:87392 Length:87392 Length:87392 Length:87392 Mode:logical
Class :character NA's:87392 Class :character Class :character Class :character Class :character Class :character Class :character TRUE:46
Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character NA's:87346
ev_level2_evse_num ev_dc_fast_count ev_other_info ev_network ev_network_web geocode_status latitude longitude date_last_confirmed
Min. : 1.000 Min. : 1.000 Mode:logical Length:87392 Length:87392 Length:87392 Min. :18.00 Min. :-162.29 Min. :2017-09-06
1st Qu.: 2.000 1st Qu.: 1.000 NA's:87392 Class :character Class :character Class :character 1st Qu.:34.42 1st Qu.:-117.75 1st Qu.:2025-07-07
Median : 2.000 Median : 2.000 Mode :character Mode :character Mode :character Median :39.72 Median : -87.64 Median :2025-07-07
Mean : 2.558 Mean : 4.268 Mean :39.27 Mean : -94.69 Mean :2025-04-13
3rd Qu.: 2.000 3rd Qu.: 6.000 3rd Qu.:43.05 3rd Qu.: -77.09 3rd Qu.:2025-07-07
Max. :102.000 Max. :120.000 Max. :68.38 Max. : -52.70 Max. :2025-07-07
NA's :13353 NA's :72204 NA's :212
id updated_at owner_type_code federal_agency_id federal_agency_name open_date hydrogen_status_link ng_vehicle_class lpg_primary
Min. : 1523 Length:87392 Length:87392 Mode:logical Mode:logical Min. :1995-08-30 Length:87392 Length:87392 Mode:logical
1st Qu.:174654 Class :character Class :character NA's:87392 NA's:87392 1st Qu.:2020-11-05 Class :character Class :character NA's:87392
Median :232795 Mode :character Mode :character Median :2022-07-08 Mode :character Mode :character
Mean :246649 Mean :2021-11-28
3rd Qu.:331404 3rd Qu.:2024-01-13
Max. :405532 Max. :2025-10-15
NA's :248
e85_blender_pump ev_connector_types country intersection_directions_french access_days_time_french bd_blends_french groups_with_access_code_french
Mode:logical Length:87392 Length:87392 Length:87392 Length:87392 Mode:logical Length:87392
NA's:87392 Class :character Class :character Class :character Class :character NA's:87392 Class :character
Mode :character Mode :character Mode :character Mode :character Mode :character
hydrogen_is_retail access_code access_detail_code federal_agency_code facility_type cng_dispenser_num cng_on_site_renewable_source
Mode:logical Length:87392 Length:87392 Mode:logical Length:87392 Min. : NA Length:87392
NA's:87392 Class :character Class :character NA's:87392 Class :character 1st Qu.: NA Class :character
Mode :character Mode :character Mode :character Median : NA Mode :character
Mean :NaN
3rd Qu.: NA
Max. : NA
NA's :87392
cng_total_compression_capacity cng_storage_capacity lng_on_site_renewable_source e85_other_ethanol_blends ev_pricing ev_pricing_french lpg_nozzle_types
Min. : NA Min. : NA Mode:logical Length:87392 Length:87392 Length:87392 Length:87392
1st Qu.: NA 1st Qu.: NA NA's:87392 Class :character Class :character Class :character Class :character
Median : NA Median : NA Mode :character Mode :character Mode :character Mode :character
Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA
NA's :87392 NA's :87392
hydrogen_pressures hydrogen_standards cng_fill_type_code cng_psi cng_vehicle_class lng_vehicle_class ev_on_site_renewable_source restricted_access
Length:87392 Length:87392 Length:87392 Length:87392 Length:87392 Mode:logical Length:87392 Mode :logical
Class :character Class :character Class :character Class :character Class :character NA's:87392 Class :character FALSE:13273
Mode :character Mode :character Mode :character Mode :character Mode :character Mode :character TRUE :1805
NA's :72314
rd_blends rd_blends_french rd_blended_with_biodiesel rd_maximum_biodiesel_level nps_unit_name cng_station_sells_renewable_natural_gas
Length:87392 Mode:logical Length:87392 Min. : NA Mode:logical Mode:logical
Class :character NA's:87392 Class :character 1st Qu.: NA NA's:87392 NA's:87392
Mode :character Mode :character Median : NA
Mean :NaN
3rd Qu.: NA
Max. : NA
NA's :87392
lng_station_sells_renewable_natural_gas maximum_vehicle_class ev_workplace_charging funding_sources
Mode:logical Length:87392 Mode :logical Length:87392
NA's:87392 Class :character FALSE:86855 Class :character
Mode :character TRUE :534 Mode :character
NA's :3
select columns for study (simple set at first) goal: where are the stations, how many of them are there per state
ev_stations_selected <- ev_stations |>
select(station_name, city, state, zip, country, latitude, longitude, ev_network)
ev_stations_selected
look at selected columns
summary(ev_stations_selected)
station_name city state zip country latitude longitude ev_network
Length:87392 Length:87392 Length:87392 Length:87392 Length:87392 Min. :18.00 Min. :-162.29 Length:87392
Class :character Class :character Class :character Class :character Class :character 1st Qu.:34.42 1st Qu.:-117.75 Class :character
Mode :character Mode :character Mode :character Mode :character Mode :character Median :39.72 Median : -87.64 Mode :character
Mean :39.27 Mean : -94.69
3rd Qu.:43.05 3rd Qu.: -77.09
Max. :68.38 Max. : -52.70
check for missing values
sum(is.na(ev_stations_selected))
[1] 0
now, just US market
ev_stations_us <- filter(ev_stations_selected, country == "US")
ev_stations_us
look at different networks (checking for Tesla and Electrify America)
ev_networks <- ev_stations_us[ ,"ev_network"]
table(ev_networks)
ev_network
7CHARGE ABM AMPED_UP AMPUP APPLEGREEN AUTEL Blink Network
41 61 5 581 35 59 5484
BP_PULSE CHAEVI CHARGELAB CHARGENET ChargePoint Network CHARGESMART_EV CHARGEUP
51 3 188 3 41702 226 286
CHARGIE CIRCLE_K DIRT_ROAD ELECTRIC_ERA Electrify America Enel X Way ENVIROSPARK
7 86 1 17 1076 5 3
EPIC_CHARGING EV Connect EVBOLT EVCS EVGATEWAY eVgo Network EVIUM
17 1427 15 249 401 1079 18
EVMATCH EVOKE EVPASSPORT EVPOWER EVRANGE FCN FLASH
52 217 2 5 38 192 139
FLITWAY FLO FORD_CHARGE FPLEV GRAVITI_ENERGY GRAVITY_CHARGING_CENTER HONEY_BADGER
8 1072 202 49 64 1 1
HYPERFUEL IN_CHARGE IONNA JULE KWIK_CHARGE LOOP MATCHA
7 7 20 6 5 915 1
Non-Networked NOODOE OpConnect POWERFLEX POWERPORT_EVC POWERPUMP RED_E
5790 253 334 169 9 3 551
REVEL REVITALIZE RIVIAN_ADVENTURE RIVIAN_WAYPOINTS ROVE SHELL_RECHARGE STAY_N_CHARGE
6 6 119 190 1 1503 55
SWTCH Tesla Tesla Destination TURNONGREEN UNIVERSAL VIALYNK WATT_EV
219 2690 4717 66 256 599 5
ZEFNET
265
filter to only contiguous US for geospatial analysis
EXPORT FOR GEOSPATIAL ANALYSIS
ev_stations_geo <- ev_stations_us |>
filter(!state %in% c("AK", "PR", "HI"))
unique(ev_stations_geo$state)
[1] "CA" "VT" "WA" "OR" "IL" "ID" "WI" "IA" "TX" "SC" "CT" "OH" "WV" "MO" "UT" "KS" "FL" "MA" "CO" "MI" "NC" "VA" "TN" "AL" "AZ" "GA" "MD" "MN" "AR" "NJ" "RI" "PA" "LA"
[34] "DC" "NY" "ME" "NH" "KY" "NE" "MS" "SD" "DE" "IN" "OK" "NM" "MT" "ND" "NV" "WY"
write.csv(ev_stations_geo, "data/ev_station_locations.csv")
breakdown by state for upcoming joins
ev_stations_by_state <- ev_stations_us |>
group_by(state) |>
summarise(total_stations = n()) |>
arrange(desc(total_stations))
ev_stations_by_state
for plotting (top 10 only)
ev_stations_by_state_10 <- ev_stations_us |>
group_by(state) |>
summarise(total_stations = n()) |>
arrange(desc(total_stations)) |>
head(10)
time to plot
ggplot(ev_stations_by_state_10, aes(x = reorder(state, -total_stations), y = total_stations)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(
title = "Top 10 States by Total EV Stations",
x = "State",
y = "EV Stations"
) +
geom_text(aes(label = total_stations), vjust = -0.2)
ggsave("outputs/top_10_states_total_ev_stations.png")
visualize US charging stations
library(maps)
ggplot(ev_stations_us, aes(x = longitude, y = latitude)) +
borders("state") +
geom_point(alpha = 0.2, color = "green", size = 0.3) +
coord_fixed(1.3) +
labs(title = "US EV Charging Stations")
ggsave("outputs/us_ev_charging_station_map.png")
import data on number of vehicle registrations by state EV Registration Data: https://afdc.energy.gov/data/10962
ev_registrations <- read_csv("data/ev_registration_by_state.csv")
ev_registrations <- clean_names(ev_registrations)
ev_registrations
drop total and DC rows
ev_registrations_50 <- ev_registrations[!(ev_registrations$state) %in% c("District of Columbia", "Total"),]
ev_registrations_50
change state names to abbreviations
ev_registrations_50$state <- state.abb[match(ev_registrations_50$state, state.name)]
ev_registrations_50
compare total number of ev stations to number of registrations by joining on state drop PR and DC
ev_combined <- ev_stations_by_state |>
left_join(ev_registrations_50)
ev_combined <- ev_combined[!(ev_combined$state) %in% c("PR", "DC"),]
ev_combined
sort by registrations, descending
ev_combined <- ev_combined |>
arrange(desc(ev_combined$registration_count))
ev_combined
import population information
State Population Data: https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-total.html
state_populations <- read_csv("data/state_populations.csv")
state_populations
change state names to abbreviations, again
state_populations$state <- state.abb[match(state_populations$state, state.name)]
state_populations
join population data to existing combined dataset
ev_combined_pop <- ev_combined |>
left_join(state_populations)
ev_combined_pop
begin simple analysis
goal: calculate electric vehicles per station for each state
ev_combined_pop <- ev_combined_pop |>
mutate(evs_per_station = round(registration_count / total_stations, 2)) |>
arrange(desc(evs_per_station))
ev_combined_pop
goal: calculate number of people per station based on population
ev_combined_pop <- ev_combined_pop |>
mutate(people_per_station = round(population / total_stations, 2)) |>
arrange(desc(people_per_station))
ev_combined_pop
goal: calculate percentage of population with an EV (rough estimate as population includes all ages and some owners may have more than one EV)
ev_combined_pop <- ev_combined_pop |>
mutate(pop_with_ev = round(registration_count / population, 4)) |>
arrange(desc(pop_with_ev))
ev_combined_pop
ev_stats <- ev_combined_pop # for later use
export finalized data frame for modeling
write.csv(ev_combined_pop, "data/ev_combined_pop.csv")
write.csv(ev_stats, "data/ev_stats.csv")
visualize with heat map show states with largest proportion of EVs to the available charging stations these states may be underdeveloped and in need of additional EV station rollout yellow and orange states may need more stations based on the number of vehicles currently registered
library(usmap)
plot_usmap(data = ev_combined_pop, values = "evs_per_station", color = "white") +
scale_fill_viridis_c(option = "C") +
labs(title = "EVs Per Charging Station in the US",
fill = "EVs/Station")
ggsave("outputs/evs_per_charging_station_us.png", bg = "grey")
visualize with heat map show states with the largest proportion of people to the available charging stations these states have the highest populations relative to the number of stations yellow and orange states may be in need of more EV stations based on state population
plot_usmap(data = ev_combined_pop, values = "people_per_station", color = "white") +
scale_fill_viridis_c(option = "C") +
labs(title = "People Per Charging Station in the US",
fill = "People/Station")
ggsave("outputs/people_per_charging_station_us.png", bg = "grey")
visualize with heat map show states with the largest highest EV saturation (EV registrations / population) yellow and orange states could be considered the faster-growing markets compared to purple and blue states as expected, California has the highest saturation of EVs followed by other Pacific Coast states other states may see jumps in saturation in the coming decades and would require additional EV stations
plot_usmap(data = ev_combined_pop, values = "pop_with_ev", color = "white") +
scale_fill_viridis_c(option = "C") +
labs(title = "EV Saturation by Population",
fill = "EV Reg/Pop")
ggsave("outputs/ev_saturation_by_population.png", bg = "grey")